Real-life examples¶
To better appreciate the performance of the model, it is also interesting to test it on real-life recordings (for which we don't have groundtruth for either the impulsive or the stationary sources).
In [38]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Audio
In [39]:
# HPSS
import torch
from rendering.is3.model_wrapper import ModelWrapper
from rendering.is3.baselines import wavelet_script
from rendering.is3.baselines import hpss
from rendering.is3.dataloader_numpy import ImpulsiveStationarySeparation
sr = 44100
hpss_module = hpss.HarmonicPercussiveDecomposition(
nfft=2048,
window_size=2048,
overlap=0.75,
margin=1.
)
hpss_module_2 = hpss.HarmonicPercussiveDecomposition(
nfft=2048,
window_size=2048,
overlap=0.75,
margin=2.
)
wavelet_module = wavelet_script.WaveletBaseline(
wavelet="db",
level=13,
sr=sr,
ks=2.,
ks_impulse=6.,
kc=1.,
kernel_size=1025,
)
model = ModelWrapper(
conf_name="014",
job_id=None,
)
_ = model.eval()
In [40]:
import librosa
def open_and_plot_audio(url):
signal, _ = librosa.load(
url,
sr=sr,
mono=True,)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), dpi=150)
# Plot the audio signal
ax1.plot(np.arange(len(signal)) / sr, signal)
ax1.set_title('Audio Signal')
ax1.set_xlabel('Time (s)')
ax1.set_ylabel('Amplitude')
# Calculate the spectrogram
S = librosa.stft(signal, n_fft=2048, hop_length=512)
S_dB = librosa.amplitude_to_db(np.abs(S), ref=np.max)
# Plot the spectrogram
img = librosa.display.specshow(
S_dB,
sr=sr,
n_fft=2048,
hop_length=512,
x_axis='time',
y_axis='log',
ax=ax2)
fig.colorbar(img, ax=ax2, format='%+2.0f dB')
ax2.set_title('Spectrogram')
plt.tight_layout()
plt.show()
return signal
Example 1.¶
In [41]:
complete_signal = open_and_plot_audio(
"audios/street.wav")
In [42]:
# 5s excerpt of the complete signal
real_mix = complete_signal[8 * sr:13 * sr]
print("Real Mixture")
display(Audio(real_mix, rate=sr))
Real Mixture
In [43]:
# HPSS margin=2.
y_p_2, y_h_2, _, _ = hpss_module_2.forward(real_mix)
print("HPSS/Impulses")
display(Audio(y_p_2, rate=sr))
print("HPSS/Stationary Background")
display(Audio(y_h_2, rate=sr))
HPSS/Impulses
HPSS/Stationary Background
In [44]:
# Wavelet filtering
wavelet_bkg, wavelet_impulse = wavelet_module.forward(real_mix)
print("Wavelet/Impulses")
display(Audio(wavelet_impulse, rate=sr))
print("Wavelet/Stationary Background")
display(Audio(wavelet_bkg, rate=sr))
Wavelet/Impulses
Wavelet/Stationary Background
In [45]:
# IS³
y_i, y_s = model.forward(torch.tensor(real_mix).reshape(1, -1))
print("IS3/Impulses")
display(Audio(y_i[0].detach().numpy(), rate=sr))
print("IS3/Stationary Background")
display(Audio(y_s[0].detach().numpy(), rate=sr))
IS3/Impulses
IS3/Stationary Background
In [46]:
fig, axs = plt.subplots(4, 1, figsize=(15, 9), sharex=True, sharey=True)
fig.suptitle('Comparison of Impulse Separation Methods (Real Recording)')
# Input waveform
axs[0].plot(real_mix)
axs[0].set_title('Input Signal')
axs[0].set_ylabel('Amplitude')
# Plot HPSS (margin=2) impulse
axs[1].plot(y_p_2)
axs[1].set_title('HPSS (margin=2) Impulse')
axs[1].set_ylabel('Amplitude')
# Plot Wavelet impulse
axs[2].plot(wavelet_impulse)
axs[2].set_title('Wavelet Impulse')
axs[2].set_ylabel('Amplitude')
# Plot IS3 impulse
axs[3].plot(y_i[0].detach().numpy())
axs[3].set_title('IS³ Impulse')
axs[3].set_ylabel('Amplitude')
axs[3].set_xlabel('Sample')
plt.tight_layout()
plt.show()
In [47]:
fig, axs = plt.subplots(4, 1, figsize=(15, 9), sharex=True, sharey=True)
fig.suptitle(
'Comparison of Stationary Background Separation Methods (Real Recording)')
# Input waveform
axs[0].plot(real_mix)
axs[0].set_title('Input Signal')
axs[0].set_ylabel('Amplitude')
# Plot HPSS (margin=2) background
axs[1].plot(y_h_2)
axs[1].set_title('HPSS (margin=2) Background')
axs[1].set_ylabel('Amplitude')
# Plot Wavelet background
axs[2].plot(wavelet_bkg)
axs[2].set_title('Wavelet Background')
axs[2].set_ylabel('Amplitude')
# Plot IS3 background
axs[3].plot(y_s[0].detach().numpy())
axs[3].set_title('IS³ Background')
axs[3].set_ylabel('Amplitude')
axs[3].set_xlabel('Sample')
plt.tight_layout()
plt.show()
Example 2.¶
In [48]:
complete_signal = open_and_plot_audio(
"audios/cafe.wav")
In [49]:
real_mix = complete_signal[10 * sr:15 * sr]
display(Audio(real_mix, rate=sr))
In [50]:
# HPSS margin=2.
y_p, y_h, _, _ = hpss_module.forward(real_mix)
print("HPSS/Impulses")
display(Audio(y_p, rate=sr))
print("HPSS/Stationary Background")
display(Audio(y_h, rate=sr))
HPSS/Impulses
HPSS/Stationary Background
In [51]:
# Wavelet filtering
wavelet_bkg, wavelet_impulse = wavelet_module.forward(real_mix)
print("Wavelet/Impulses")
display(Audio(wavelet_impulse, rate=sr))
print("Wavelet/Stationary Background")
display(Audio(wavelet_bkg, rate=sr))
Wavelet/Impulses
Wavelet/Stationary Background
In [52]:
# IS³
y_i, y_s = model.forward(torch.tensor(real_mix).reshape(1, -1))
print("IS3/Impulses")
display(Audio(y_i[0].detach().numpy(), rate=sr))
print("IS3/Stationary Background")
display(Audio(y_s[0].detach().numpy(), rate=sr))
IS3/Impulses
IS3/Stationary Background
In [53]:
fig, axs = plt.subplots(4, 1, figsize=(15, 9), sharex=True, sharey=True)
fig.suptitle('Comparison of Impulse Separation Methods (Real Recording)')
# Input waveform
axs[0].plot(real_mix)
axs[0].set_title('Input Signal')
axs[0].set_ylabel('Amplitude')
# Plot HPSS (margin=1) impulse
axs[1].plot(y_p)
axs[1].set_title('HPSS (margin=2) Impulse')
axs[1].set_ylabel('Amplitude')
# Plot Wavelet impulse
axs[2].plot(wavelet_impulse)
axs[2].set_title('Wavelet Impulse')
axs[2].set_ylabel('Amplitude')
# Plot IS3 impulse
axs[3].plot(y_i[0].detach().numpy())
axs[3].set_title('IS³ Impulse')
axs[3].set_ylabel('Amplitude')
axs[3].set_xlabel('Sample')
plt.tight_layout()
plt.show()
In [54]:
fig, axs = plt.subplots(4, 1, figsize=(15, 9), sharex=True, sharey=True)
fig.suptitle(
'Comparison of Stationary Background Separation Methods (Real Recording)')
# Input waveform
axs[0].plot(real_mix)
axs[0].set_title('Input Signal')
axs[0].set_ylabel('Amplitude')
# Plot HPSS (margin=1) background
axs[1].plot(y_h)
axs[1].set_title('HPSS (margin=2) Background')
axs[1].set_ylabel('Amplitude')
# Plot Wavelet background
axs[2].plot(wavelet_bkg)
axs[2].set_title('Wavelet Background')
axs[2].set_ylabel('Amplitude')
# Plot IS3 background
axs[3].plot(y_s[0].detach().numpy())
axs[3].set_title('IS³ Background')
axs[3].set_ylabel('Amplitude')
axs[3].set_xlabel('Sample')
plt.tight_layout()
plt.show()
In [ ]: